In [1]:
import numpy as np
import pandas as pd
In [2]:
# Set some Pandas options
pd.set_option('html', False)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 10)
In [3]:
data = pd.read_hdf('/var/datasets/dshs/CD2007Q1/reduced_PUDF_base1q2007.h5','data')
Based on the example in http://www.christianpeccei.com/zipmap/
ZIP area data downloaded from ftp://ftp.cs.brown.edu/u/spr/zipdata
The mapping from states to numbers can be seen here: https://github.com/ssoper/zip-code-boundaries/blob/master/raw.html
In [4]:
import os.path
if not os.path.exists('zipdata/zt06_d00_ascii.zip'):
!wget -P zipdata ftp://ftp.cs.brown.edu/u/spr/zipdata/zt06_d00_ascii.zip
!unzip -d zipdata zipdata/zt06_d00_ascii.zip
In [5]:
if not os.path.exists('zipdata/zt48_d00_ascii.zip'):
!wget -P zipdata ftp://ftp.cs.brown.edu/u/spr/zipdata/zt48_d00_ascii.zip
!unzip -d zipdata zipdata/zt48_d00_ascii.zip
In [6]:
def read_ascii_boundary(filestem):
'''
Reads polygon data from an ASCII boundary file.
Returns a dictionary with polygon IDs for keys. The value for each
key is another dictionary with three keys:
'name' - the name of the polygon
'polygon' - list of (longitude, latitude) pairs defining the main
polygon boundary
'exclusions' - list of lists of (lon, lat) pairs for any exclusions in
the main polygon
'''
metadata_file = filestem + 'a.dat'
data_file = filestem + '.dat'
# Read metadata
lines = [line.strip().strip('"') for line in open(metadata_file)]
polygon_ids = lines[::6]
polygon_names = lines[2::6]
polygon_data = {}
for polygon_id, polygon_name in zip(polygon_ids, polygon_names):
# Initialize entry with name of polygon.
# In this case the polygon_name will be the 5-digit ZIP code.
polygon_data[polygon_id] = {'name': polygon_name}
del polygon_data['0']
# Read lon and lat.
f = open(data_file)
for line in f:
fields = line.split()
if len(fields) == 3:
# Initialize new polygon
polygon_id = fields[0]
polygon_data[polygon_id]['polygon'] = []
polygon_data[polygon_id]['exclusions'] = []
elif len(fields) == 1:
# -99999 denotes the start of a new sub-polygon
if fields[0] == '-99999':
polygon_data[polygon_id]['exclusions'].append([])
else:
# Add lon/lat pair to main polygon or exclusion
lon = float(fields[0])
lat = float(fields[1])
if polygon_data[polygon_id]['exclusions']:
polygon_data[polygon_id]['exclusions'][-1].append((lon, lat))
else:
polygon_data[polygon_id]['polygon'].append((lon, lat))
return polygon_data
In [7]:
import csv
from pylab import *
From: http://mpld3.github.io/
In [8]:
import mpld3
if True:
mpld3.enable_notebook()
In [9]:
reduced = data[['Pat_ZIP', 'Total_Charges']]
chargesbyzip = reduced.groupby('Pat_ZIP').mean()
countbyzip = reduced.groupby('Pat_ZIP').count()
In [10]:
#def makezipfigure(series, zipstem = 'zipdata/zt48_d00'):
series = chargesbyzip
series = countbyzip
zipstem = 'zipdata/zt48_d00'
maxvalue = series.max().values[0]
valuename = series.keys()[0]
# Read in ZIP code boundaries for Te
d = read_ascii_boundary(zipstem)
# Create figure and two axes: one to hold the map and one to hold
# the colorbar
figure(figsize=(5, 5), dpi=100)
map_axis = axes([0.0, 0.0, 0.8, 0.9])
cb_axis = axes([0.83, 0.1, 0.03, 0.8])
#map_axis = axes([0.0, 0.0, 4.0, 4.5])
#cb_axis = axes([4.15, 0.5, 0.15, 3.0])
#map_axis = axes([0.0, 0.0, 1.6, 1.8])
#cb_axis = axes([1.66, 0.2, 0.06, 1.2])
# Define colormap to color the ZIP codes.
# You can try changing this to cm.Blues or any other colormap
# to get a different effect
cmap = cm.PuRd
# Create the map axis
axes(map_axis)
gca().set_axis_off()
# Loop over the ZIP codes in the boundary file
for polygon_id in d:
polygon_data = array(d[polygon_id]['polygon'])
zipcode = d[polygon_id]['name']
try:
value = series.xs(zipcode).values[0]
# Define the color for the ZIP code
fc = cmap(float(value) / maxvalue)
except:
fc = (1.0, 1.0, 1.0, 1.0)
edgecolor = [ square(min(fc[:3])) ]*3 + [0.5]
# Draw the ZIP code
patch = Polygon(array(polygon_data), facecolor=fc,
edgecolor=edgecolor, linewidth=.1)
# patch = Polygon(array(polygon_data), facecolor=fc,
# edgecolor=(.5, .5, .5, 1), linewidth=.2)
gca().add_patch(patch)
gca().autoscale()
title(valuename + " per ZIP Code in Texas")
# Draw colorbar
cb = mpl.colorbar.ColorbarBase(cb_axis, cmap=cmap,
norm = mpl.colors.Normalize(vmin=0, vmax=maxvalue))
cb.set_label(valuename)
savefig('texas.pdf', dpi=100)
# Change all fonts to Arial
#for o in gcf().findobj(matplotlib.text.Text):
# o.set_fontname('Arial')
There might be an algorithm to calculate the area of the convex hull
In [ ]: